library("tidyverse")
library("ComplexHeatmap")
library("CTexploreR")
library("circlize")
library("DESeq2")
library("SummarizedExperiment")
library("SingleCellExperiment")
library("DropletUtils")
library("scater")
library("scran")
library("scuttle")

legends_param <- list(
  labels_gp = gpar(col = "black", fontsize = 3),
  title_gp = gpar(col = "black", fontsize = 3, fontface = "bold"),
  simple_anno_size = unit(0.1, "cm"),
  row_names_gp = gpar(fontsize = 1),
  annotation_name_side = "left",
  border = FALSE,
  border_gp = gpar(lwd = 0.1),
  grid_width = unit(0.1, "cm"),
  grid_height = unit(0.01, "cm"),
  legend_height = unit(0.1, "cm"))
CT_genes_X_met <- all_genes %>% 
  filter(CT_gene_type == "CT_gene") %>% 
  filter(X_linked, regulated_by_methylation) %>% 
  pull(external_gene_name)

Single-Cell RNA-Seq Reveals Lineage and X Chromosome Dynamics in Human Preimplantation Embryos Petropoulos et al, Cell 2016

Quality control

# Start from raw counts
load(file = "/storage/research/dduv/cbio-lg/cluster/DataSets/early_embryos/Petropoulos_dataset/data/Petropoulos_sce_raw_counts.rda")
sce <- petropoulos_sce_raw_counts
rm(petropoulos_sce_raw_counts)
assayNames(sce) <- "counts"

table(sce$lineage, sce$day)
##                     
##                       E3  E4  E5  E6  E7
##   epiblast             0   0  41  45  41
##   not applicable      81 190 152   0   0
##   primitive endoderm   0   0  32  39  37
##   trophectoderm        0   0 142 293 388

Check sex attribute for each indivisual

as_tibble(colData(sce)) %>% 
  filter(day == "E3") %>% 
  ggplot(aes(x = individual, y = gender, color = gender)) +
  geom_jitter(width = 0.1)+
  theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~ day)

# "E3.1", "E3.2", "E3.3" are ambigous
sce <- sce[, !sce$individual %in% c("E3.1", "E3.2", "E3.3")]

as_tibble(colData(sce)) %>% 
  filter(day == "E4") %>% 
  ggplot(aes(x = individual, y = gender, color = gender)) +
  geom_jitter(width = 0.1)+
  theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~ day)

as_tibble(colData(sce)) %>% 
  filter(day == "E5") %>% 
  ggplot(aes(x = individual, y = gender, color = gender)) +
  geom_jitter(width = 0.1)+
  theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~ day)

# "E5.40" is likely a Female
sce$gender[sce$individual == "E5.40"] <- "F"


as_tibble(colData(sce)) %>% 
  filter(day == "E6") %>% 
  ggplot(aes(x = individual, y = gender, color = gender)) +
  geom_jitter(width = 0.1)+
  theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~ day)

# "E6.7" is likely a Male
sce$gender[sce$individual == "E6.7"] <- "M"

as_tibble(colData(sce)) %>% 
  filter(day == "E7") %>% 
  ggplot(aes(x = individual, y = gender, color = gender)) +
  geom_jitter(width = 0.1)+
  theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~ day)

# No mitochondrial genes (not in GTF or in genome ref used for alignment?)
# grep("^MT-", x = rownames(sce), value = TRUE)

sce <- sce[rowSums(assay(sce)) > 0,]

sce$gender <- factor(sce$gender, levels = c("M", "F"))


rowData(sce)$is_mito <- FALSE
rowData(sce)$is_mito[grep(pattern = "^MT-", x = rowData(sce)$gene, value = FALSE)] <- TRUE
sce <- addPerCellQC(sce, subsets=list(Mito = rowData(sce)$is_mito))
# No MT genes
#table(sce$subsets_Mito_percent)

table(sce$day, sce$gender)
##     
##        M   F
##   E3  35  28
##   E4  98  92
##   E5 206 161
##   E6 142 235
##   E7 176 290

Number of detected genes

thr_detection <- 7500

thr_detection <- 0

# Define outliers with low detection level
#outliers <- colnames(sce)[isOutlier(sce$detected, type = "lower")]
outliers <- colnames(sce)[sce$detected < thr_detection]
##outliers

sce$outlier_detected <- FALSE
sce$outlier_detected[sce$sample %in% outliers] <- TRUE

f1 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = detected)) +
  geom_jitter(aes(x = 1, y = detected, color = outlier_detected)) +
  geom_violin(alpha = 0.5, outliers = F) +
  geom_hline(yintercept = thr_detection, linetype = "dashed") +
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
                axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  ggtitle("Number of detected genes")

# as_tibble(colData(sce)) %>% 
#   ggplot(aes(x = 1, y = detected)) +
#   geom_jitter(aes(x = 1, y = detected, color = source_name)) +
#   geom_violin(alpha = 0.5, outliers = F)

f2 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = detected)) +
  geom_jitter(aes(x = 1, y = detected, color = day)) +
  geom_violin(alpha = 0.5, outliers = F) +
    #scale_color_manual(values = c("blue", "cyan3"))+
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
                axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())+
  ggtitle("Number of detected genes")





gridExtra::grid.arrange(f2, f1, ncol = 3)

Sequencing depth

thr_sum <- 1000000
thr_sum <- 0
#Define outliers with low detection level
#outliers <- colnames(sce)[isOutlier(sce$sum, type = "lower")]
outliers <- colnames(sce)[sce$sum < thr_sum]


sce$outlier_sum <- FALSE
sce$outlier_sum[sce$sample %in% outliers] <- TRUE

f1 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = sum)) +
  geom_jitter(aes(x = 1, y = sum, color = outlier_sum)) +
  geom_violin(alpha = 0.5, outliers = F) +
  geom_hline(yintercept = thr_sum, linetype = "dashed") +
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
                axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  ggtitle("Total number of reads")


f3 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = sum)) +
  geom_jitter(aes(x = 1, y = sum, color = day)) +
  geom_violin(alpha = 0.5, outliers = F) +
  #geom_hline(yintercept = thr_sum, linetype = "dashed") +
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
                axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  ggtitle("Total number of reads")

# as_tibble(colData(sce)) %>% 
#   ggplot(aes(x = 1, y = detected)) +
#   geom_jitter(aes(x = 1, y = detected, color = source_name)) +
#   geom_violin(alpha = 0.5, outliers = F)



gridExtra::grid.arrange(f3, f1, ncol = 2)

Ouliers identification

outliers <- as_tibble(colData(sce)) %>% 
  filter(outlier_sum | outlier_detected) %>% 
  pull(sample)

sce$outlier <- FALSE
sce$outlier[sce$sample %in% outliers] <- TRUE

# Define outliers with low sum value
f1 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = detected)) +
  geom_jitter(aes(x = 1, y = detected, color = outlier)) +
  geom_violin(alpha = 0.5, outliers = F) +
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) 

f2 <- as_tibble(colData(sce)) %>% 
  ggplot(aes(x = 1, y = sum)) +
  geom_jitter(aes(x = 1, y = sum, color = outlier)) +
  geom_violin(alpha = 0.5, outliers = F) +
  theme(legend.position = "bottom",
        legend.title.position =  "top",
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) 

gridExtra::grid.arrange(f1, f2, ncol = 2)

=> Removed outlier cells

table(outlier =sce$outlier)
## outlier
## FALSE 
##  1463
sce <- sce[, !sce$outlier]

=> 1463 cells kept

table(sce$day, sce$gender)
##     
##        M   F
##   E3  35  28
##   E4  98  92
##   E5 206 161
##   E6 142 235
##   E7 176 290

Normalisation

## Normalisation
sce <- logNormCounts(sce, transform = "none")
sce <- logNormCounts(sce) 
## Feature selection
hvg <- modelGeneVar(sce)
fit_hvg <- metadata(hvg)
# plot(fit_hvg$mean, fit_hvg$var,
#      xlab = "Mean of log-expression",
#      ylab = "Variance of log-expression",
#      col = ifelse(hvg$p.value < 0.1, "red", "black"))
# curve(fit_hvg$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

topHvg <- getTopHVGs(hvg, n = 500)

rowSubset(sce) <- topHvg


## PCA
set.seed(123)
sce <- fixedPCA(sce, rank = 100, subset.row = topHvg) # rank = 50 by default
plotPCA(sce, colour_by = "day", shape = "lineage")

plotPCA(sce, colour_by = "gender")

plotPCA(sce, colour_by = "lineage")

idem but focus on E4

n_cells_per_individual <- enframe(table(sce$individual), name = "individual", value = "n_cells")

sce_reduced <- sce[, sce$day == "E4"]
hvg <- modelGeneVar(sce_reduced)
fit_hvg <- metadata(hvg)
# plot(fit_hvg$mean, fit_hvg$var,
#      xlab = "Mean of log-expression",
#      ylab = "Variance of log-expression",
#      col = ifelse(hvg$p.value < 0.1, "red", "black"))
# curve(fit_hvg$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

topHvg <- getTopHVGs(hvg, n = 500)

rowSubset(sce_reduced) <- topHvg


# plotPCA(sce_reduced, colour_by = "day", shape = "lineage")
# plotPCA(sce_reduced, colour_by = "gender")

selected <- n_cells_per_individual %>% filter(n_cells> 5) %>% pull(individual)
plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "individual") 

idem but focus on E5

n_cells_per_individual <- enframe(table(sce$individual), name = "individual", value = "n_cells")

sce_reduced <- sce[, sce$day == "E5"]
hvg <- modelGeneVar(sce_reduced)
fit_hvg <- metadata(hvg)
# plot(fit_hvg$mean, fit_hvg$var,
#      xlab = "Mean of log-expression",
#      ylab = "Variance of log-expression",
#      col = ifelse(hvg$p.value < 0.1, "red", "black"))
# curve(fit_hvg$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

topHvg <- getTopHVGs(hvg, n = 500)

rowSubset(sce_reduced) <- topHvg

# 
# plotPCA(sce_reduced, colour_by = "day", shape = "lineage")
# plotPCA(sce_reduced, colour_by = "gender")

selected <- n_cells_per_individual %>% filter(n_cells> 10) %>% pull(individual)
plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "individual") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "gender") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "sum") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "detected") 

idem but focus on E6

n_cells_per_individual <- enframe(table(sce$individual), name = "individual", value = "n_cells")

sce_reduced <- sce[, sce$day == "E6"]
hvg <- modelGeneVar(sce_reduced)
fit_hvg <- metadata(hvg)
# plot(fit_hvg$mean, fit_hvg$var,
#      xlab = "Mean of log-expression",
#      ylab = "Variance of log-expression",
#      col = ifelse(hvg$p.value < 0.1, "red", "black"))
# curve(fit_hvg$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

topHvg <- getTopHVGs(hvg, n = 500)

rowSubset(sce_reduced) <- topHvg


# plotPCA(sce_reduced, colour_by = "day", shape = "lineage")
# plotPCA(sce_reduced, colour_by = "gender")

selected <- n_cells_per_individual %>% filter(n_cells> 10) %>% pull(individual)
plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "individual") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "gender") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "lineage") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "sum") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "detected") 

idem but focus on E7

n_cells_per_individual <- enframe(table(sce$individual), name = "individual", value = "n_cells")

sce_reduced <- sce[, sce$day == "E7"]
hvg <- modelGeneVar(sce_reduced)
fit_hvg <- metadata(hvg)
# plot(fit_hvg$mean, fit_hvg$var,
#      xlab = "Mean of log-expression",
#      ylab = "Variance of log-expression",
#      col = ifelse(hvg$p.value < 0.1, "red", "black"))
# curve(fit_hvg$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

topHvg <- getTopHVGs(hvg, n = 500)

rowSubset(sce_reduced) <- topHvg


#plotPCA(sce_reduced, colour_by = "day", shape = "lineage")
#plotPCA(sce_reduced, colour_by = "gender")

selected <- n_cells_per_individual %>% filter(n_cells> 10) %>% pull(individual)
plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "individual") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "gender") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "lineage") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "sum") 

plotPCA(sce_reduced[, sce_reduced$individual %in% selected], colour_by = "detected") 

table(sce$lineage, sce$day, sce$gender)
## , ,  = M
## 
##                     
##                       E3  E4  E5  E6  E7
##   epiblast             0   0  17   4  19
##   not applicable      35  98  99   0   0
##   primitive endoderm   0   0  18  14  23
##   trophectoderm        0   0  72 124 134
## 
## , ,  = F
## 
##                     
##                       E3  E4  E5  E6  E7
##   epiblast             0   0  24  41  22
##   not applicable      28  92  53   0   0
##   primitive endoderm   0   0  14  25  14
##   trophectoderm        0   0  70 169 254

CG genes Heatmaps

exploratory Heatmaps

legends_param <- list(
  labels_gp = gpar(col = "black", fontsize = 10),
  title_gp = gpar(col = "black", fontsize = 10, fontface = "bold"),
  simple_anno_size = unit(0.1, "cm"),
  row_names_gp = gpar(fontsize = 8),
  annotation_name_side = "left",
  border = FALSE,
  border_gp = gpar(lwd = 0.1),
  grid_width = unit(0.3, "cm"),
  grid_height = unit(0.3, "cm"),
  legend_height = unit(0.3, "cm"))

legend_colors <- c(
  "#5E4FA2", "#3288BD", "#66C2A5", "#ABDDA4", "#E6F598",
  "#FFFFBF", "#FEE08B", "#FDAE61", "#F46D43", "#D53E4F",
  "#9E0142")

ensembl_gene <- data.frame(gene = all_genes$external_gene_name)
rownames(ensembl_gene) <- all_genes$ensembl_gene_id

draw_embryo_exp_Petropoulos_dataset <- function(genes, 
                            detected_in_percent_of_cells = 0,
                            day = NULL,
                            font_size = 6,
                            gender = NULL,
                            scale = FALSE,
                            assayName = "normcounts",
                            scale_lims = NULL,
                            clustRow = TRUE,
                            h_width = 8, 
                            h_height = 8, 
                            show_right_annot = TRUE,
                            clust_method = "centroid"){
  

  
  # Top Annotations
  df_col <- colData(sce)
  df_col <- df_col[!is.na(df_col$gender),]
  if (!is.null(day)) df_col <- df_col[df_col$day %in% day, ]
  df_col$sex <- df_col$gender
  levels(df_col$sex) <- c("Male", "Female")
  df_col <- df_col[order(df_col$day, df_col$sex, df_col$individual, df_col$lineage),]
  df_col$group <- paste0(df_col$day, "_", df_col$gender)
  
  
  column_ha_sex = HeatmapAnnotation(
    sex = df_col$sex,
    col = list(sex = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_individual = HeatmapAnnotation(
    individual = df_col$individual,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_lineage = HeatmapAnnotation(
    lineage = df_col$lineage,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_day = HeatmapAnnotation(
    day = df_col$day,
    #col = list(Stage = c("ICM" = "gray", "TE" = "gray10")),
    border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_sum = HeatmapAnnotation(
    sum = df_col$sum,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_detected = HeatmapAnnotation(
    detected = df_col$detected,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
  n <- which(assayNames(sce) == assayName)
  mat <- assay(sce, i = n)[genes, ]
  
  # Show only genes detected in more than x% of cells
  mat <- mat[rowSums(mat > 0) >= dim(mat)[2]*detected_in_percent_of_cells,]
  # gene_type <- CT_genes_types %>% 
  #   filter(gene %in% genes) %>% 
  #   mutate(gene = factor(gene, levels = genes)) %>% 
  #   arrange(gene)
  
  if (scale) {
    mat <- t(scale(t(mat), center = FALSE, scale = TRUE))
    assayName = "scaled_exp"
    }
  
  
  if (is.null(scale_lims)) scale_lims <- c(0, quantile(mat, .9))
  heatmap_colors <- colorRamp2(
    seq(scale_lims[1], scale_lims[2], length = 11), legend_colors)
  
  lines <- tibble(gene = rownames(mat)) %>% 
    mutate(group = case_when(gene %in% CT_genes$external_gene_name ~ "X-linked CG MethDep",
                             !gene %in% CT_genes$external_gene_name ~ "Control")) %>% 
    mutate(group = factor(group, levels = c("X-linked CG MethDep", "Control")))
  
  ht1 <- Heatmap(mat[, rownames(df_col)],
                  name = assayName,
                 clustering_method_rows = "ward.D2",
                 clustering_method_columns = "ward.D2",
                 column_split = df_col$group,
                 row_split = lines$group,
                 row_gap = unit(c(1), "mm"),
                 col = heatmap_colors,
                 cluster_rows = clustRow,
                 cluster_columns = FALSE,
                 cluster_row_slices = FALSE,
                 cluster_column_slices = FALSE,
                 show_row_names = TRUE,
                 show_column_names = FALSE,
                 show_row_dend = FALSE,
                 column_names_gp = gpar(fontsize = 8),
                 column_names_centered = TRUE,
                 row_names_gp = gpar(fontsize = font_size),
                 row_names_side = "left",
                 border = TRUE,
                 border_gp = gpar(lwd = 0.5),
                 column_names_side = c("bottom"),
                 column_names_rot = 90,
                 column_title_gp = gpar(fontsize = 8),
                 row_title_gp = gpar(fontsize = 0),
                 top_annotation = c(column_ha_day,
                                    column_ha_sex,
                                    column_ha_detected,
                                    column_ha_lineage,
                                    column_ha_sum,
                                    column_ha_individual,
                                    # column_ha_sample,
                                    gap = unit(0.1, "mm")),
                 use_raster = TRUE,
                 raster_device = "CairoPNG",
                 raster_quality = 10,
                 heatmap_legend_param = legends_param,
                 width = unit(h_width, "cm"),
                 height = unit(h_height, "cm"))
      
  draw(ht1, merge_legend = TRUE)
}

my_genes <- all_genes %>% 
  filter(CT_gene_type == "CT_gene") %>% 
  filter(X_linked, regulated_by_methylation) %>% 
  filter(external_gene_name %in% rownames(sce)) 

controls <- all_genes %>% 
  filter(external_gene_name %in% c(#"GAPDH", "ACTB", "POU5F1", "XIST", 
    "DDX3Y", "RPS4Y1", "EIF1AY")) 

X_linked <- all_genes %>% 
  filter(chr == "X") %>% 
  filter(external_gene_name %in% rownames(sce)) %>% 
  filter(!external_gene_name %in% CT_genes$external_gene_name) %>% 
  pull(external_gene_name) 

chr1_linked <- all_genes %>% 
  filter(chr == 1) %>% 
  filter(external_gene_name %in% rownames(sce)) %>% 
  filter(!external_gene_name %in% CT_genes$external_gene_name) %>% 
  pull(external_gene_name) 
draw_embryo_exp_Petropoulos_dataset <- function(genes, 
                            detected_in_percent_of_cells = 0,
                            day = NULL,
                            font_size = 6,
                            gender = NULL,
                            scale = FALSE,
                            assayName = "normcounts",
                            scale_lims = NULL,
                            clustRow = TRUE,
                            h_width = 8, 
                            h_height = 8, 
                            show_right_annot = TRUE,
                            clust_method = "centroid"){
  

  
  # Top Annotations
  df_col <- colData(sce)
  df_col <- df_col[!is.na(df_col$gender),]
  if (!is.null(day)) df_col <- df_col[df_col$day %in% day, ]
  df_col$sex <- df_col$gender
  levels(df_col$sex) <- c("Male", "Female")
  df_col <- df_col[order(df_col$day, df_col$sex, df_col$individual, df_col$lineage),]
  df_col$group <- paste0(df_col$day, "_", df_col$gender)
  
  
  column_ha_sex = HeatmapAnnotation(
    sex = df_col$sex,
    col = list(sex = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_individual = HeatmapAnnotation(
    individual = df_col$individual,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_lineage = HeatmapAnnotation(
    lineage = df_col$lineage,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_day = HeatmapAnnotation(
    day = df_col$day,
    #col = list(Stage = c("ICM" = "gray", "TE" = "gray10")),
    border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_sum = HeatmapAnnotation(
    sum = df_col$sum,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_detected = HeatmapAnnotation(
    detected = df_col$detected,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
  n <- which(assayNames(sce) == assayName)
  mat <- assay(sce, i = n)[genes, ]
  
  # Show only genes detected in more than x% of cells
  mat <- mat[rowSums(mat > 0) >= dim(mat)[2]*detected_in_percent_of_cells,]
  # gene_type <- CT_genes_types %>% 
  #   filter(gene %in% genes) %>% 
  #   mutate(gene = factor(gene, levels = genes)) %>% 
  #   arrange(gene)
  
  if (scale) {
    mat <- t(scale(t(mat), center = FALSE, scale = TRUE))
    assayName = "scaled_exp"
    }
  
  
  if (is.null(scale_lims)) scale_lims <- c(0, quantile(mat, .9))
  heatmap_colors <- colorRamp2(
    seq(scale_lims[1], scale_lims[2], length = 11), legend_colors)
  
  lines <- tibble(gene = rownames(mat)) %>% 
    mutate(group = case_when(gene %in% CT_genes$external_gene_name ~ "X-linked CG MethDep",
                             !gene %in% CT_genes$external_gene_name ~ "Control")) %>% 
    mutate(group = factor(group, levels = c("X-linked CG MethDep", "Control")))
  
  ht1 <- Heatmap(mat[, rownames(df_col)],
                  name = assayName,
                 clustering_method_rows = "ward.D2",
                 clustering_method_columns = "ward.D2",
                 column_split = df_col$group,
                 row_split = lines$group,
                 row_gap = unit(c(1), "mm"),
                 col = heatmap_colors,
                 cluster_rows = clustRow,
                 cluster_columns = FALSE,
                 cluster_row_slices = FALSE,
                 cluster_column_slices = FALSE,
                 show_row_names = TRUE,
                 show_column_names = FALSE,
                 show_row_dend = FALSE,
                 column_names_gp = gpar(fontsize = 8),
                 column_names_centered = TRUE,
                 row_names_gp = gpar(fontsize = font_size),
                 row_names_side = "left",
                 border = TRUE,
                 border_gp = gpar(lwd = 0.5),
                 column_names_side = c("bottom"),
                 column_names_rot = 90,
                 column_title_gp = gpar(fontsize = 8),
                 row_title_gp = gpar(fontsize = 0),
                 top_annotation = c(column_ha_day,
                                    column_ha_sex,
                                    # column_ha_sample,
                                    gap = unit(0.1, "mm")),
                 use_raster = TRUE,
                 raster_device = "CairoPNG",
                 raster_quality = 10,
                 heatmap_legend_param = legends_param,
                 width = unit(h_width, "cm"),
                 height = unit(h_height, "cm"))
      
  draw(ht1, merge_legend = TRUE)
}

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, #"ACTB", "GAPDH", 
                                    "RPS26", "XIST"),
                                    #day = "E6",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, 
                                    controls$external_gene_name, #"ACTB", "GAPDH", 
                                    "RPS26", "XIST"),
                                    #day = "E3",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

E3 embryos

draw_embryo_exp_Petropoulos_dataset <- function(genes, 
                            detected_in_percent_of_cells = 0,
                            day = NULL,
                            font_size = 6,
                            gender = NULL,
                            scale = FALSE,
                            assayName = "normcounts",
                            scale_lims = NULL,
                            clustRow = TRUE,
                            h_width = 8, 
                            h_height = 8, 
                            show_right_annot = TRUE,
                            clust_method = "centroid"){
  

  
  # Top Annotations
  df_col <- colData(sce)
  df_col <- df_col[!is.na(df_col$gender),]
  if (!is.null(day)) df_col <- df_col[df_col$day %in% day, ]
  df_col$sex <- df_col$gender
  levels(df_col$sex) <- c("Male", "Female")
  df_col <- df_col[order(df_col$day, df_col$sex, df_col$individual, df_col$lineage),]
  df_col$group <- paste0(df_col$day, "_", df_col$gender)
  
  
  column_ha_sex = HeatmapAnnotation(
    sex = df_col$sex,
    col = list(sex = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_individual = HeatmapAnnotation(
    individual = df_col$individual,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_lineage = HeatmapAnnotation(
    lineage = df_col$lineage,
    #col = list(individual = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
    column_ha_day = HeatmapAnnotation(
    day = df_col$day,
    #col = list(Stage = c("ICM" = "gray", "TE" = "gray10")),
    border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_sum = HeatmapAnnotation(
    sum = df_col$sum,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
    column_ha_detected = HeatmapAnnotation(
    detected = df_col$detected,
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
  n <- which(assayNames(sce) == assayName)
  mat <- assay(sce, i = n)[genes, ]
  
  # Show only genes detected in more than x% of cells
  mat <- mat[rowSums(mat > 0) >= dim(mat)[2]*detected_in_percent_of_cells,]
  # gene_type <- CT_genes_types %>% 
  #   filter(gene %in% genes) %>% 
  #   mutate(gene = factor(gene, levels = genes)) %>% 
  #   arrange(gene)
  
  if (scale) {
    mat <- t(scale(t(mat), center = FALSE, scale = TRUE))
    assayName = "scaled_exp"
    }
  
  
  if (is.null(scale_lims)) scale_lims <- c(0, quantile(mat, .9))
  heatmap_colors <- colorRamp2(
    seq(scale_lims[1], scale_lims[2], length = 11), legend_colors)
  
  lines <- tibble(gene = rownames(mat)) %>% 
    mutate(group = case_when(gene %in% CT_genes$external_gene_name ~ "X-linked CG MethDep",
                             !gene %in% CT_genes$external_gene_name ~ "Control")) %>% 
    mutate(group = factor(group, levels = c("X-linked CG MethDep", "Control")))
  
  ht1 <- Heatmap(mat[, rownames(df_col)],
                  name = assayName,
                 clustering_method_rows = "ward.D2",
                 clustering_method_columns = "ward.D2",
                 column_split = df_col$group,
                 row_split = lines$group,
                 row_gap = unit(c(1), "mm"),
                 col = heatmap_colors,
                 cluster_rows = clustRow,
                 cluster_columns = FALSE,
                 cluster_row_slices = FALSE,
                 cluster_column_slices = FALSE,
                 show_row_names = TRUE,
                 show_column_names = FALSE,
                 show_row_dend = FALSE,
                 column_names_gp = gpar(fontsize = 8),
                 column_names_centered = TRUE,
                 row_names_gp = gpar(fontsize = font_size),
                 row_names_side = "left",
                 border = TRUE,
                 border_gp = gpar(lwd = 0.5),
                 column_names_side = c("bottom"),
                 column_names_rot = 90,
                 column_title_gp = gpar(fontsize = 8),
                 row_title_gp = gpar(fontsize = 0),
                 top_annotation = c(column_ha_day,
                                    column_ha_sex,
                                    column_ha_detected,
                                    column_ha_lineage,
                                    column_ha_sum,
                                    column_ha_individual,
                                    # column_ha_sample,
                                    gap = unit(0.1, "mm")),
                 use_raster = TRUE,
                 raster_device = "CairoPNG",
                 raster_quality = 10,
                 heatmap_legend_param = legends_param,
                 width = unit(h_width, "cm"),
                 height = unit(h_height, "cm"))
      
  draw(ht1, merge_legend = TRUE)
}

Focus on selected CG genes :

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, "ACTB", "GAPDH", "RPS26", "XIST"),
                                    day = "E3",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

All X-linked MethDep CG genes :

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, controls$external_gene_name, "RPS26"),
                                    day = "E3",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0)

All X-linked other than CG genes:

draw_embryo_exp_Petropoulos_dataset(c(X_linked),
                                    day = "E3",
                                    clustRow = T, 
                            #scale_lims = c(0, 500),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.5) 

chr1-linked genes:

draw_embryo_exp_Petropoulos_dataset(c(chr1_linked),
                                    day = "E3",
                                    clustRow = T, 
                            #scale_lims = c(0, 1),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.8) 

Some Female cells (usually corresponding to the same embryo) express no CG genes,

but they also express some control genes at lower expression level

  • bad quality embryos ? (unlikely as no link with number of detected genes or sequencing depth)

  • expression embryo-specific?

  • linked with ZGA that has not occured yet?

E4 embryos

Focus on selected CG genes :

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, "ACTB", "GAPDH", "RPS26", "XIST"),
                                    day = "E4",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

All X-linked MethDep CG genes :

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, controls$external_gene_name, "RPS26"),
                                    day = "E4",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0)

All X-linked other than CG genes:

draw_embryo_exp_Petropoulos_dataset(c(X_linked),
                                    day = "E4",
                                    clustRow = T, 
                            #scale_lims = c(0, 500),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.5) 

chr1-linked genes:

draw_embryo_exp_Petropoulos_dataset(c(chr1_linked),
                                    day = "E4",
                                    clustRow = T, 
                            #scale_lims = c(0, 1),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.8) 

E5 embryos

Focus on selected CG genes :

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, "ACTB", "GAPDH", "RPS26", "XIST"),
                                    day = "E5",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

All X-linked MethDep CG genes :

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, controls$external_gene_name, "RPS26"),
                                    day = "E5",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0)

All X-linked other than CG genes:

draw_embryo_exp_Petropoulos_dataset(c(X_linked),
                                    day = "E5",
                                    clustRow = T, 
                            #scale_lims = c(0, 500),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.5) 

chr1-linked genes:

draw_embryo_exp_Petropoulos_dataset(c(chr1_linked),
                                    day = "E5",
                                    clustRow = T, 
                            #scale_lims = c(0, 1),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.8) 

E6 embryos

Focus on selected CG genes :

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, "ACTB", "GAPDH", "RPS26", "XIST"),
                                    day = "E6",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

All X-linked MethDep CG genes :

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, controls$external_gene_name, "RPS26"),
                                    day = "E6",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0)

All X-linked other than CG genes:

draw_embryo_exp_Petropoulos_dataset(c(X_linked),
                                    day = "E6",
                                    clustRow = T, 
                            #scale_lims = c(0, 500),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.5) 

chr1-linked genes:

draw_embryo_exp_Petropoulos_dataset(c(chr1_linked),
                                    day = "E6",
                                    clustRow = T, 
                            #scale_lims = c(0, 1),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.8) 

E7 embryos

Focus on selected CG genes :

draw_embryo_exp_Petropoulos_dataset(c("MAGEC2", "MAGEB16", "VCX", "VCX2", "VCX3B", "DDX53", #"PAGE2", #"PAGE1", 
                                    controls$external_gene_name, "ACTB", "GAPDH", "RPS26", "XIST"),
                                    day = "E7",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0) 

All X-linked MethDep CG genes :

draw_embryo_exp_Petropoulos_dataset(c(my_genes$external_gene_name, controls$external_gene_name, "RPS26"),
                                    day = "E7",
                                    clustRow = T, 
                            scale_lims = c(0, 300),
                            detected_in_percent_of_cells = 0)

All X-linked other than CG genes:

draw_embryo_exp_Petropoulos_dataset(c(X_linked),
                                    day = "E7",
                                    clustRow = T, 
                            #scale_lims = c(0, 500),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.5) 

chr1-linked genes:

draw_embryo_exp_Petropoulos_dataset(c(chr1_linked),
                                    day = "E7",
                                    clustRow = T, 
                            #scale_lims = c(0, 1),
                            scale = T,
                            font_size = 6,
                            detected_in_percent_of_cells = 0.8) 

pseudo bulk

sce$ind_lineage <- paste0(sce$individual, '_',  sce$lineage)
dds <- scuttle::summarizeAssayByGroup(sce, 
                            ids = colData(sce)[, c("ind_lineage")],
                            statistics = "sum")

coldata <- tibble(ind_lineage = dds$ids,
                  ncells = dds$ncells) %>% 
  left_join(as_tibble(colData(sce)) %>% 
              dplyr::select(ind_lineage, day, stage, gender, lineage) %>% unique()) %>% 
  as.data.frame()


# remove E3
coldata <- coldata[coldata$day %in% c("E4", "E5", "E6", "E7"),]



#table(coldata$gender, coldata$ind_lineage)


# Remove embryo with ambigous gender
#coldata <- coldata[!coldata$ambigous, ]
rownames(coldata) <- coldata$ind_lineage

# table(ncells = coldata$ncells, day = coldata$day)
# table(ncells = coldata$ncells, day = coldata$day, coldata$gender)


number_cells_thr <- 3
n <- length(unique(dds$ids))
as_tibble(colData(dds)) %>% 
  mutate(day = substr(start = 1, stop = 2, ids),
         embryo = gsub("_.*", x = ids, replace = '' )) %>% 
  ggplot(aes( x = day, y = ncells, color = day)) +
  geom_jitter()  +
 # scale_color_manual(values = sample(scales::hue_pal()(n)))+
 theme(legend.position = "none") +
  geom_hline(yintercept = number_cells_thr, linetype = "dashed")

Remove Individuals with less than 3 cells

Removed day3 (ZGA not fully completed yet)

design = gender + lineage + day

# Remove Individual with less than x cells
coldata <- coldata[coldata$ncells >= number_cells_thr,]
#table(ncells = coldata$ncells, day = coldata$day, coldata$gender, coldata$lineage)

dds <- dds[, coldata$ind_lineage]
dds$day <- coldata$day
dds$stage <- coldata$stage
dds$gender <- coldata$gender
dds$lineage <- coldata$lineage
dds$ambigous <- coldata$ambigous


colData(dds)$group <- paste0(dds$day, "_", dds$lineage, "_", dds$gender)


dds <- DESeqDataSetFromMatrix(countData = assay(dds[, rownames(colData(dds))]), 
                              colData = colData(dds),
                              design = ~ gender + lineage + day) 



dds$day <- factor(dds$day)
dds$gender <- factor(dds$gender, c(levels = c("M", "F")))
table(dds$day, dds$gender)
##     
##       M  F
##   E4  7  7
##   E5 23 20
##   E6 10 17
##   E7 14 17
table(dds$lineage, dds$day, dds$gender)
## , ,  = M
## 
##                     
##                      E4 E5 E6 E7
##   epiblast            0  3  1  3
##   not applicable      7  8  0  0
##   primitive endoderm  0  2  2  4
##   trophectoderm       0 10  7  7
## 
## , ,  = F
## 
##                     
##                      E4 E5 E6 E7
##   epiblast            0  4  4  4
##   not applicable      7  7  0  0
##   primitive endoderm  0  2  4  3
##   trophectoderm       0  7  9 10
dds <- estimateSizeFactors(dds)
filtering_thr <- 10
keep <- rowSums(counts(dds, normalize = TRUE) >= filtering_thr ) >= 5
table(keep)
## keep
## FALSE  TRUE 
##  5832 18583
dds <- DESeq(dds[keep,])

#dds <- DESeq(dds)

as_tibble(colData(dds)) %>% 
  ggplot(aes(x = ids, y = sizeFactor, fill = gender)) +
  geom_col()+
  theme(axis.text.x = element_text(angle = 90, size = 5, hjust = 1, vjust = .5)) +
  facet_wrap(~ day, scales = "free_x", ncol = 4) 

PCA

## PCA
rld <- varianceStabilizingTransformation(dds)
rv <- rowVars(assay(rld))
selected <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[selected, ]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
percentVar_1 <- paste0("PC1: ", round(percentVar[1] * 100), "% variance")
percentVar_2 <- paste0("PC2: ", round(percentVar[2] * 100), "% variance")
percentVar_3 <- paste0("PC3: ", round(percentVar[3] * 100), "% variance")
percentVar_4 <- paste0("PC4: ", round(percentVar[4] * 100), "% variance")
pcaData <- as_tibble(pca$x, rownames = "samples") %>%
  dplyr::select(samples, "PC1", "PC2", "PC3", "PC4") %>%
  left_join(as_tibble(colData(dds), rownames = "samples"))

ggplot(pcaData, aes(x = PC1, y = PC2, col = lineage, shape = gender)) +
  geom_point(size = 2.5) +
  xlab(percentVar_1) +
  ylab(percentVar_2) +
  facet_wrap(~ day)

ggplot(pcaData, aes(x = PC1, y = PC2, col = sizeFactor, shape = gender)) +
  geom_point(size = 2.5) +
  scale_color_continuous(type = "viridis") +
  xlab(percentVar_1) +
  ylab(percentVar_2) +
  facet_wrap(~ day)

# ggplot(pcaData, aes(x = PC3, y = PC4, col = lineage, shape = gender)) +
#   geom_point(size = 2.5) +
#   xlab(percentVar_3) +
#   ylab(percentVar_4) +
#   facet_wrap(~ day)

=> Separation by day and by lineage

Volcano

load("~/cluster/CBIO-templates/ensembl_2_geneName/transcripts_infos.rda")
transcripts_infos <- transcripts_infos %>% filter(chromosome_name %in% c(1:22, "X", "Y", "MT")) %>% 
              dplyr::select(external_gene_name, chromosome_name) %>% unique()

res_dds <- results(dds, 
                   name = "gender_F_vs_M", 
                   independentFiltering = FALSE)
res_tbl <- as_tibble(res_dds, rownames = "external_gene_name") %>% 
  left_join(transcripts_infos) %>% 
  arrange(padj)




# res_dds <- results(dds, 
#                    name = "gender_female_vs_male", 
#                    independentFiltering = TRUE)

# res_shr <- lfcShrink(dds, coef = "gender_F_vs_M")
# res_shr_tbl <- as_tibble(res_shr, rownames = "external_gene_name") %>%
#   left_join(transcripts_infos) %>%
#   arrange(padj)

#save(res_tbl, file = "../tmp_data/res_tbl_gender_F_vs_M.rda")
#save(res_shr_tbl, file = "../tmp_data/res_shr_tbl_gender_F_vs_M.rda")


padj_thr <- 0.05
log2FC_thr <- 1

ggplot(res_tbl %>% filter(!is.na(padj)), aes(x = pvalue)) + 
    geom_histogram(bins = 20, boundary = 0) +
  ggtitle("pvalues histogram")

labeled_genes <- res_tbl %>% 
  filter(chromosome_name != "Y") %>% 
  filter(padj < padj_thr) %>% 
  filter(external_gene_name %in% CT_genes$external_gene_name) %>% 
  #filter(abs(log2FoldChange) > log2FC_thr ) %>% 
  arrange(padj) #%>% head(30)

res_tbl %>% 
  filter(!is.na(padj)) %>% 
  ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
  geom_point(size = 0.5) +
    geom_point(data = res_tbl %>% 
               filter(chromosome_name == "X"), 
             aes(x= log2FoldChange, y = -log10(padj)), 
             color = "steelblue", size = 0.55) +
  geom_point(data = res_tbl %>% 
               filter(external_gene_name %in% CT_genes$external_gene_name), 
             aes(x= log2FoldChange, y = -log10(padj)), 
             color = "red", size = 0.55) +
  geom_point(data = res_tbl %>% 
               filter(chromosome_name == "Y"), 
             aes(x= log2FoldChange, y = -log10(padj)), 
             color = "black", size = 0.55) +
 # xlab("log2FoldChange") + 
  ylab("-log10(padj)") +
  ggrepel::geom_text_repel(data = res_tbl %>% filter(padj < 0.05 & log2FoldChange > 3),
                           aes(x = log2FoldChange, y = -log10(padj), label = external_gene_name),
                           size = 2,
                           segment.size = 0.1, max.overlaps = 30) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_hline(yintercept = -log10(padj_thr), linetype = "dashed", color = "gray70")

Volcano plot for paper

load("~/cluster/Packages/CTdata/eh_data/CT_genes.rda")

CTP_genes <- CT_genes %>% 
  filter(CT_gene_type == "CTP_gene")

CT_genes <- CT_genes %>% 
  filter(CT_gene_type == "CT_gene")

thr <- 1e-35
logFC_thr <- 8
res_tbl_modified <- res_tbl %>% 
  mutate(group = case_when(padj < thr ~ "padj_outlier",
                           abs(log2FoldChange) >= logFC_thr ~ "logFC_outlier",
                           padj > thr & abs(log2FoldChange) < logFC_thr ~ "others")) %>% 
  mutate(padj = ifelse(padj <= thr, thr, padj)) %>% 
  mutate(log2FoldChange = ifelse(log2FoldChange >= logFC_thr, logFC_thr, log2FoldChange)) %>% 
  filter(chromosome_name %in% c(1:22, "X", "Y")) %>% 
  mutate(chr = case_when(chromosome_name %in% c("X", "Y") ~ chromosome_name,
                         !chromosome_name %in% c("X", "Y") ~ "Autosome")) %>% 
  mutate(X_CG_MethDEp = case_when(external_gene_name %in% CT_genes$external_gene_name ~ TRUE,
                             external_gene_name %in% CT_genes$external_gene_name ~ FALSE)) %>% 
  filter(!is.na(padj)) %>% 
  filter(!is.na(group)) 
  #filter(abs(log2FoldChange) > 0.05)

labeled_X_genes <- res_tbl_modified %>% 
  filter(chr == "X") %>% 
  filter(abs(log2FoldChange) > 5 & padj < 0.00005) %>% 
  filter(!external_gene_name %in% CT_genes$external_gene_name) %>% 
  #filter(external_gene_name %in% CT_genes$external_gene_name) %>% 
  #filter(abs(log2FoldChange) > log2FC_thr ) %>% 
  arrange(padj) 

labeled_CT_genes <- res_tbl_modified %>% 
  #filter(chr != "Y") %>% 
  filter(external_gene_name %in% CT_genes_X_met & padj < 0.0005 & log2FoldChange > 2.5) %>% 
  #filter(external_gene_name %in% CT_genes$external_gene_name) %>% 
  #filter(abs(log2FoldChange) > log2FC_thr ) %>% 
  arrange(padj) 

X_sign_genes <- res_tbl_modified %>% 
  filter(chr == "X") %>% 
  filter(padj < padj_thr) %>% 
  arrange(padj) #%>% head(30)

Y_sign_genes <- res_tbl_modified %>% 
  filter(chr == "Y") %>% 
  filter(padj < padj_thr) %>% 
  filter(log2FoldChange < 0 ) %>% 
  arrange(padj) #%>% head(30)


set.seed = 123
fig <- res_tbl_modified %>% 
  ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
  geom_point(data = res_tbl_modified,
             aes(x= log2FoldChange, y = -log10(padj), shape = group),
             size = 1, color = "gray80") + 
    geom_point(data = res_tbl_modified %>% 
               filter(external_gene_name %in% X_sign_genes$external_gene_name),
             aes(x= log2FoldChange, y = -log10(padj), shape = group),
             size = 1, color = "steelblue") +
  geom_point(data = res_tbl_modified %>% 
               filter(external_gene_name %in% Y_sign_genes$external_gene_name),
             aes(x= log2FoldChange, y = -log10(padj), shape = group),
             size = 1, color = "black") +
  geom_point(data = res_tbl_modified %>% 
               filter(external_gene_name %in% CT_genes_X_met),# & padj < padj_thr),
             aes(x= log2FoldChange, y = -log10(padj), shape = group),
             size = 1.2, color = "firebrick2") +
  scale_shape_manual(values = c(15, 19, 17))+
                       # c("padj_outlier" = 17,
                       #          "others" = 15,
                       #          "logFC_outlier" = 19))+
  xlab("log2FoldChange") + 
  ylab("-log10(padj)") +
  geom_vline(xintercept = 0, linetype = "dashed", 
             color = "black", linewidth = 0.2) +
  # geom_vline(xintercept = -1, linetype = "dashed",
  #            color = "black", linewidth = 0.2) +
  geom_hline(yintercept = -log10(padj_thr), 
             linetype = "dashed", color = "black", linewidth = 0.2) +
  ggrepel::geom_text_repel(data = subset(res_tbl_modified, external_gene_name %in% labeled_X_genes$external_gene_name),
                           aes(x = log2FoldChange, y = -log10(padj), label = external_gene_name),
                           size = 3, color = "steelblue",
                           segment.size = 0.1, max.overlaps = 10) +
   ggrepel::geom_text_repel(data = subset(res_tbl_modified, external_gene_name %in% labeled_CT_genes$external_gene_name),
                           aes(x = log2FoldChange, y = -log10(padj), label = external_gene_name),
                           size = 3, color = "firebrick2",
                           segment.size = 0.1, max.overlaps = 30) +
  xlim(-logFC_thr,logFC_thr) +
  ggtitle("Petropoulos dataset") +
  theme_bw() +
  theme(legend.position = "none",
        title = element_text(size = 10),
        panel.grid = element_blank()) 
fig

pdf(file = "../figs/FigS2_Volcano_Petropoulos_dataset.pdf", width = 4, height = 4)
fig 

dev.off()
## png 
##   2
  • Genes with a padjusted value < 10^{-35} are represented as triangles

  • Genes with an absolute log2FC > 8 are represented as squares

  • Red dots = X-linked MethDep genes

  • Blue dots = X-linked genes not CG genes

  • Black dots = Y-linked genes

  • Gray dots = other genes

# pdf(file = paste0("../figs/figs_CT_in_embryo/Fig_Female_vs_Male_volcano_Zhu.pdf"),
#     width = 5, height = 4)
# fig
# dev.off()
# #idem but shrinked logFC:
# 
# res_shr_tbl %>%
#   filter(!is.na(padj)) %>%
#   ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
#   geom_point(size = 0.5) +
#   geom_point(size = 0.5) +
#   geom_point(data = res_shr_tbl %>%
#                filter(chromosome_name == "X"),
#              aes(x= log2FoldChange, y = -log10(padj)),
#              color = "steelblue", size = 0.55) +
#   geom_point(data = res_shr_tbl %>%
#                filter(external_gene_name %in% CT_genes$external_gene_name),
#              aes(x= log2FoldChange, y = -log10(padj)),
#              color = "red", size = 0.55) +
# 
#   geom_point(data = res_shr_tbl %>%
#                filter(chromosome_name == "Y"),
#              aes(x= log2FoldChange, y = -log10(padj)),
#              color = "black", size = 0.55) +
#   # xlab("log2FoldChange") +
#   ylab("-log10(padj)") +
#   ggrepel::geom_text_repel(data = subset(res_shr_tbl, external_gene_name %in% CT_genes$external_gene_name),
#                            aes(x = log2FoldChange, y = -log10(padj), label = external_gene_name),
#                            size = 2,
#                            segment.size = 0.1, max.overlaps = 30) +
#   geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
#   geom_hline(yintercept = -log10(padj_thr), linetype = "dashed", color = "gray70")
ordered_samples <- as_tibble(colData(dds)) %>% 
  arrange(gender, day) %>% 
  pull(ids)
plot_counts <- function(my_gene, order = ordered_samples){
  as_tibble(counts(dds[my_gene,], normalize = T), rownames = 'gene') %>%
  gather(ids, counts, -gene) %>% 
  left_join(as_tibble(colData(dds))) %>% 
  mutate(sample = factor(ids, levels = order)) %>% 
 # mutate(gene = mapIds(org.Hs.eg.db, ENSEMBL, "SYMBOL", "ENSEMBL")) %>% 
  # mutate(Gene = case_when(is.na(gene) ~ ENSEMBL,
  #                         !is.na(gene) ~ gene)) %>% 
  ggplot(aes(x = sample, y = counts, fill = gender)) +
  geom_bar(stat = 'identity')+
  facet_wrap( ~ gene, scales = "free") +
  theme(axis.text.x = element_text(size = 6, angle = 90, vjust = 0.5, hjust = 1)) +
  xlab(element_blank()) +
  ylab("Normalized counts") 
}
plot_counts("VCX2")

plot_counts(c("MAGEC2"))

# plot_counts(res_tbl %>% filter(padj < 0.05) %>% arrange(desc(log2FoldChange)) %>% pull(external_gene_name) %>% head(16))
# plot_counts("MAGEC2")
# plot_counts("XIST")
# plot_counts("POU4F1")



plot_counts_by_day <- function(my_gene, order = ordered_samples){
  as_tibble(counts(dds[my_gene,], normalize = T), rownames = 'gene') %>%
  gather(ids, counts, -gene) %>% 
  left_join(as_tibble(colData(dds))) %>% 
  mutate(sample = factor(ids, levels = order)) %>% 
 # mutate(gene = mapIds(org.Hs.eg.db, ENSEMBL, "SYMBOL", "ENSEMBL")) %>% 
  # mutate(Gene = case_when(is.na(gene) ~ ENSEMBL,
  #                         !is.na(gene) ~ gene)) %>% 
  ggplot(aes(x = sample, y = counts, fill = gender)) +
    #scale_fill_manual(values = list("M" = "steelblue", "F" = "deeppink"))+
  geom_bar(stat = 'identity')+
  facet_wrap( ~ day, scales = "free") +
  theme(axis.text.x = element_text(size = 6, angle = 90, vjust = 0.5, hjust = 1)) +
  xlab(element_blank()) +
  ylab("Normalized counts") 
}

plot_counts_by_day("MAGEC2")

# pdf(file = paste0("../figs/figs_CT_in_embryo/Fig_Female_vs_Male_volcano_petropoulos_E4_E7.pdf"),
#     width = 5, height = 4)
# fig
# dev.off()

Expression of all X-linked_methDep significantly DE between females and males

my_genes <- labeled_CT_genes %>% 
  filter(log2FoldChange > 2)
plot_counts <- function(my_gene, order = ordered_samples){
  as_tibble(counts(dds[my_gene,], normalize = T), rownames = 'gene') %>%
  gather(ids, counts, -gene) %>% 
  left_join(as_tibble(colData(dds))) %>% 
  mutate(sample = factor(ids, levels = order)) %>% 
 # mutate(gene = mapIds(org.Hs.eg.db, ENSEMBL, "SYMBOL", "ENSEMBL")) %>% 
  # mutate(Gene = case_when(is.na(gene) ~ ENSEMBL,
  #                         !is.na(gene) ~ gene)) %>% 
  ggplot(aes(x = sample, y = counts, fill = gender)) +
  geom_bar(stat = 'identity')+
  facet_wrap( ~ gene, scales = "free") +
  theme(axis.text.x = element_text(size = 0, angle = 90, vjust = 0.5, hjust = 1)) +
  xlab(element_blank()) +
  ylab("Normalized counts") 
}
plot_counts(my_genes$external_gene_name)

Expression heatmap

legends_param <- list(
  labels_gp = gpar(col = "black", fontsize = 8),
  title_gp = gpar(col = "black", fontsize = 8, fontface = "bold"),
  simple_anno_size = unit(0.1, "cm"),
  row_names_gp = gpar(fontsize = 1),
  annotation_name_side = "left",
  border = FALSE,
  border_gp = gpar(lwd = 0.1),
  grid_width = unit(0.1, "cm"),
  grid_height = unit(0.1, "cm"),
  legend_height = unit(0.1, "cm"))

legend_colors <- c(
  "#5E4FA2", "#3288BD", "#66C2A5", "#ABDDA4", "#E6F598",
  "#FFFFBF", "#FEE08B", "#FDAE61", "#F46D43", "#D53E4F",
  "#9E0142")

ensembl_gene <- data.frame(gene = all_genes$external_gene_name)
rownames(ensembl_gene) <- all_genes$ensembl_gene_id

draw_embryo_exp_Petropoulos_dataset <- function(genes, 
                                                font_size = 6,
                                                scale = FALSE,
                                                log_transform = TRUE,
                                                scale_lims = NULL,
                                                clustRow = TRUE,
                                                h_width = 8, 
                                                h_height = 8, 
                                                show_right_annot = TRUE,
                                                clust_method = "centroid"){
  

  
  # Top Annotations
  df_col <- colData(dds)
  df_col$sex <- df_col$gender
  levels(df_col$sex) <- c("Male", "Female")
  df_col <- df_col[order(df_col$sex, df_col$sex),]
  df_col$day_sex <- paste0(df_col$day, '_', df_col$gender)

  column_ha_sex = HeatmapAnnotation(
    sex = df_col$sex,
    col = list(sex = c("Female" = "deeppink", "Male" = "steelblue")),
        border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
  
  column_ha_day = HeatmapAnnotation(
    day = df_col$day,
    col = list(day = c("E4" = "aliceblue", "E4" = "moccasin", "E5" = "orange3", "E6" = "firebrick3", "E7" = "brown4")),
       # border = FALSE, 
    simple_anno_size = unit(0.4, "cm"), 
    annotation_legend_param = legends_param,
    annotation_name_gp = gpar(fontsize = 0)) 
    
  genes <- genes[genes %in% rownames(dds)]
  
  mat <- counts(dds[genes, ], normalize = TRUE)
  #rownames(mat) <- ensembl_gene[genes, ]
  
  units = "Counts"
  if (log_transform) {
    mat <- log1p(mat)
    units = "log1p(Counts)"
    }
  if (scale) {
    mat <- t(scale(t(mat), center = FALSE, scale = TRUE))
    units = "scaled_exp"
    }
  
  # gene_type <- CT_genes_types %>% 
  #   filter(gene %in% genes) %>% 
  #   mutate(gene = factor(gene, levels = genes)) %>% 
  #   arrange(gene)
  
  
  
  
  if (is.null(scale_lims)) scale_lims <- c(0, max(mat))
  heatmap_colors <- colorRamp2(
    seq(scale_lims[1], scale_lims[2], length = 11), legend_colors)
  
  
  ht1 <- Heatmap(mat[, rownames(df_col)],
                  name = units,
                 clustering_method_rows = "ward.D",
                 clustering_method_columns = "ward.D",
                 column_split = df_col$day,
                 row_gap = unit(c(1), "mm"),
                 col = heatmap_colors,
                 cluster_rows = clustRow,
                 cluster_columns = FALSE,
                 cluster_row_slices = FALSE,
                 cluster_column_slices = FALSE,
                 show_row_names = TRUE,
                 show_column_names = FALSE,
                 show_row_dend = FALSE,
                 column_names_gp = gpar(fontsize = 8),
                 column_names_centered = TRUE,
                 row_names_gp = gpar(fontsize = font_size),
                 row_names_side = "left",
                 border = TRUE,
                 border_gp = gpar(lwd = 0.5),
                 column_names_side = c("bottom"),
                 column_names_rot = 90,
                 column_title_gp = gpar(fontsize = 12),
                 row_title_gp = gpar(fontsize = 0),
                 top_annotation = c(column_ha_sex,
                                    #column_ha_day,
                                    gap = unit(0.1, "mm")),
                 use_raster = TRUE,
                 raster_device = "CairoPNG",
                 raster_quality = 10,
                 heatmap_legend_param = legends_param,
                 width = unit(h_width, "cm"),
                 height = unit(h_height, "cm"))
      
  draw(ht1, merge_legend = TRUE)
}

Expression Heatmap of all X-linked_methDep

draw_embryo_exp_Petropoulos_dataset(CT_genes_X_met, clustRow = T, log_transform = FALSE, scale_lims = c(0,200)) 

# #idem but log_transformed:
# draw_embryo_exp_Petropoulos_dataset(CT_genes_X_met, clustRow = T, log_transform = TRUE) 

Expression Heatmap of all X-linked_methDep significantly DE between females and males

and with logFC > 3

my_genes <- labeled_CT_genes %>% 
  filter(log2FoldChange > 3)

draw_embryo_exp_Petropoulos_dataset(my_genes$external_gene_name, 
                                    clustRow = T, 
                                    h_width = 8, 
                                    h_height = 4, 
                                    log_transform = F, 
                                    scale_lims = c(0, 200))

as_tibble(counts(dds[my_genes$external_gene_name,]), rownames = "gene") %>% 
  pivot_longer(names_to = "embryo", values_to = "counts", -gene) %>% 
  left_join(as_tibble(colData(dds), rownames = "embryo")) %>% 
  ggplot(aes(x = group, y = log1p(counts), color = gender)) +
  scale_color_manual(values = c("M" = "blue", "F" = "deeppink")) +
  #geom_jitter(size = 1) +
  geom_boxplot(alpha = 0, outliers = FALSE) +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90))

as_tibble(counts(dds["MAGEC2",]), rownames = "gene") %>% 
  pivot_longer(names_to = "embryo", values_to = "counts", -gene) %>% 
  left_join(as_tibble(colData(dds), rownames = "embryo")) %>% 
  ggplot(aes(x = group, y = log1p(counts), color = gender)) +
  scale_color_manual(values = c("M" = "blue", "F" = "deeppink")) +
  #geom_jitter(size = 1) +
  geom_boxplot(alpha = 0, outliers = FALSE) +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, size = 5))

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scran_1.34.0                scater_1.34.1              
##  [3] scuttle_1.16.0              DropletUtils_1.26.0        
##  [5] SingleCellExperiment_1.28.1 DESeq2_1.46.0              
##  [7] SummarizedExperiment_1.36.0 Biobase_2.66.0             
##  [9] MatrixGenerics_1.18.1       matrixStats_1.5.0          
## [11] GenomicRanges_1.58.0        GenomeInfoDb_1.42.3        
## [13] IRanges_2.40.1              S4Vectors_0.44.0           
## [15] BiocGenerics_0.52.0         circlize_0.4.16            
## [17] CTexploreR_1.2.0            CTdata_1.6.0               
## [19] ComplexHeatmap_2.22.0       lubridate_1.9.4            
## [21] forcats_1.0.0               stringr_1.5.1              
## [23] purrr_1.0.4                 readr_2.1.5                
## [25] tidyr_1.3.1                 tibble_3.2.1               
## [27] ggplot2_3.5.1               tidyverse_2.0.0            
## [29] dplyr_1.1.4                
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3        rstudioapi_0.17.1        
##   [3] jsonlite_1.9.1            shape_1.4.6.1            
##   [5] magrittr_2.0.3            magick_2.8.6             
##   [7] ggbeeswarm_0.7.2          farver_2.1.2             
##   [9] rmarkdown_2.29            GlobalOptions_0.1.2      
##  [11] zlibbioc_1.52.0           vctrs_0.6.5              
##  [13] Cairo_1.6-2               memoise_2.0.1            
##  [15] DelayedMatrixStats_1.28.1 htmltools_0.5.8.1        
##  [17] S4Arrays_1.6.0            AnnotationHub_3.14.0     
##  [19] curl_6.2.2                BiocNeighbors_2.0.1      
##  [21] Rhdf5lib_1.28.0           SparseArray_1.6.2        
##  [23] rhdf5_2.50.2              sass_0.4.9               
##  [25] bslib_0.9.0               cachem_1.1.0             
##  [27] igraph_2.1.4              lifecycle_1.0.4          
##  [29] iterators_1.0.14          pkgconfig_2.0.3          
##  [31] rsvd_1.0.5                Matrix_1.7-3             
##  [33] R6_2.6.1                  fastmap_1.2.0            
##  [35] GenomeInfoDbData_1.2.13   clue_0.3-66              
##  [37] digest_0.6.37             colorspace_2.1-1         
##  [39] AnnotationDbi_1.68.0      dqrng_0.4.1              
##  [41] irlba_2.3.5.1             ExperimentHub_2.14.0     
##  [43] RSQLite_2.3.9             beachmat_2.22.0          
##  [45] labeling_0.4.3            filelock_1.0.3           
##  [47] timechange_0.3.0          httr_1.4.7               
##  [49] abind_1.4-8               compiler_4.4.1           
##  [51] bit64_4.6.0-1             withr_3.0.2              
##  [53] doParallel_1.0.17         BiocParallel_1.40.0      
##  [55] viridis_0.6.5             DBI_1.2.3                
##  [57] HDF5Array_1.34.0          R.utils_2.13.0           
##  [59] rappdirs_0.3.3            DelayedArray_0.32.0      
##  [61] bluster_1.16.0            rjson_0.2.23             
##  [63] tools_4.4.1               vipor_0.4.7              
##  [65] beeswarm_0.4.0            R.oo_1.27.0              
##  [67] glue_1.8.0                rhdf5filters_1.18.1      
##  [69] cluster_2.1.8.1           generics_0.1.3           
##  [71] gtable_0.3.6              tzdb_0.5.0               
##  [73] R.methodsS3_1.8.2         hms_1.1.3                
##  [75] metapod_1.14.0            BiocSingular_1.22.0      
##  [77] ScaledMatrix_1.14.0       XVector_0.46.0           
##  [79] ggrepel_0.9.6             BiocVersion_3.20.0       
##  [81] foreach_1.5.2             pillar_1.10.1            
##  [83] limma_3.62.2              BiocFileCache_2.14.0     
##  [85] lattice_0.22-6            bit_4.6.0                
##  [87] tidyselect_1.2.1          locfit_1.5-9.12          
##  [89] Biostrings_2.74.1         knitr_1.50               
##  [91] gridExtra_2.3             edgeR_4.4.2              
##  [93] xfun_0.51                 statmod_1.5.0            
##  [95] stringi_1.8.4             UCSC.utils_1.2.0         
##  [97] yaml_2.3.10               evaluate_1.0.3           
##  [99] codetools_0.2-20          BiocManager_1.30.25      
## [101] cli_3.6.4                 munsell_0.5.1            
## [103] jquerylib_0.1.4           Rcpp_1.0.14              
## [105] dbplyr_2.5.0              png_0.1-8                
## [107] parallel_4.4.1            blob_1.2.4               
## [109] sparseMatrixStats_1.18.0  viridisLite_0.4.2        
## [111] scales_1.3.0              crayon_1.5.3             
## [113] GetoptLong_1.0.5          rlang_1.1.5              
## [115] cowplot_1.1.3             KEGGREST_1.46.0